How is climate change affecting global crop production? I will try to answer this question by using different analysis using the Food and Agriculture Organization of the United Nation (FAO) which can be found here http://www.fao.org/faostat/en/?#data/QC
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
# Suppress warnings from pandas
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn import metrics
from sklearn.metrics import precision_score ,recall_score, f1_score
from sklearn.metrics import accuracy_score
import chart_studio
username = 'omegamarkos' # your username
api_key = 'PUk2JYOCBrghPN2lfAay' # your api key - go to profile > settings > regenerate key
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)
from plotly.offline import iplot, init_notebook_mode,download_plotlyjs
import plotly.graph_objects as go
import plotly.express as px
#use this code to be able to display all the output in the cell instead of only displaying the out put for the last one. see the next cell.
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
df_prod=pd.read_csv('FAOSTAT_crop_production.csv') # Total yearly crop production in tonnes by country
df_prod.head()
df_prod['Item'] = df_prod['Item'].str.replace('Rice, paddy', 'Rice') # rename the the item name rice to make it easier to access
df_prod.rename(columns={'Value':'prod'}, inplace=True) # rename the column value to prod to avoid confusion for later
df_prod.Area.unique()
df_prod_2018 = df_prod.query("Year == 2018")
df_prod_2018_wheat =df_prod_2018.query("Item == 'Wheat'")
df_prod_2018_wheat_top =df_prod_2018_wheat.nlargest(10,['prod'])
df_prod_2018_wheat_top.head()
fig = px.bar(df_prod_2018_wheat_top, y='prod', x='Area', text='prod').update_traces(texttemplate='%{text:.2s}', textposition='outside').update_layout(uniformtext_minsize=8, uniformtext_mode='hide',title ='Top Wheat Producers')
fig.show()
#To open the plot on chart studio web
import chart_studio.plotly as py
py.plot(fig, filename = 'wheat producers', auto_open=True)
df_prod_2018_rice =df_prod_2018.query("Item == 'Rice'") # only rice data
df_prod_2018_rice_top =df_prod_2018_rice.nlargest(10,['prod']) #the top 10
fig = px.bar(df_prod_2018_rice_top, y='prod', x='Area', text='prod').update_traces(texttemplate='%{text:.2s}', textposition='outside').update_layout(uniformtext_minsize=8, uniformtext_mode='hide', title ='Top Rice Producers')
fig.show()
df_prod_2018_maize =df_prod_2018.query("Item == 'Maize'")
df_prod_2018_maize_top =df_prod_2018_maize.nlargest(10,['prod'])
fig = px.bar(df_prod_2018_maize_top, y='prod', x='Area', text='prod').update_traces(texttemplate='%{text:.2s}', textposition='outside').update_layout(uniformtext_minsize=8, uniformtext_mode='hide',title ='Top Maize Producers')
fig.show()
# fig = px.bar(df_prod_rice, x="Area", y= "prod").update_xaxes(categoryorder="max descending")
# fig.show()
fig = px.choropleth(df_prod,locations="Area", locationmode="country names",animation_frame="Year", animation_group="Area",
color="prod",color_continuous_scale= 'YlGn' , hover_name="Area", title = 'Total crop production by Country').update_geos(
resolution=50,
showcoastlines=True, coastlinecolor="RebeccaPurple",
showland=True, landcolor="LightGreen",
showocean=True, oceancolor="LightBlue",
showlakes=True, lakecolor="Blue",
showrivers=True, rivercolor="Blue"
)
fig.show()
df_crop =pd.read_csv('FAOSTAT_data_main_crop_yield.csv') # crop yield per hectare
df_crop.head()
from pandas_profiling import ProfileReport
crop_profile = ProfileReport(df_crop)
crop_profile
# drop the missing values
df_crop= df_crop.dropna()
df_crop.columns
#change name to avoid confusion
df_crop.rename(columns={'Value':'yield'}, inplace=True)
df_crop.columns
We only need the following features from the data. the rest are either constant or irrelevant
[ 'Area Code', 'Area','Item Code', 'Item', 'Year', 'Value']
df_crop = df_crop[['Area Code', 'Area','Item Code', 'Item', 'Year', 'yield']]
df_crop.tail()
# def getFiltered(df,col):
# df_sub=df.query("Item = {}".format(col))
# df_sub_max = df_sub.groupby(['Year'], as_index = False)['Value'].max()
# return df_sub_max
df_crop['Item'] = df_crop['Item'].str.replace('Rice, paddy', 'Rice')
df_crop['Item'].unique()
df_crop_2018 = df_crop.query("Year == 2018")
df_crop_2018_rice =df_crop_2018.query("Item == 'Rice'") # only rice data
df_crop_2018_rice_top =df_crop_2018_rice.nlargest(10,['yield']) #the top 10
df_crop_2018_rice_top.head()
fig = px.pie(df_crop_2018_rice_top, values='yield', names='Area',
title='top high rice yield ',
hover_data=['yield'], labels={'yield':' crop yield'}).update_traces(textposition='inside', textinfo='percent+label')
fig.show()
fig = px.bar(df_crop_2018_rice_top, y='yield', x='Area', text='yield').update_traces(texttemplate='%{text:.2s}', textposition='outside').update_layout(uniformtext_minsize=8, uniformtext_mode='hide',title ='Top Rice Yield')
fig.show()
df_crop_2018_wheat =df_crop_2018.query("Item == 'Wheat'") # only rice data
df_crop_2018_wheat_top =df_crop_2018_wheat.nlargest(10,['yield'])
fig = px.bar(df_crop_2018_wheat_top, y='yield', x='Area', text='yield').update_traces(texttemplate='%{text:.2s}', textposition='outside').update_layout(uniformtext_minsize=8, uniformtext_mode='hide',title ='Top Wheat Yield')
fig.show()
df_crop_2018_maize =df_crop_2018.query("Item == 'Maize'") # only rice data
df_crop_2018_maize_top =df_crop_2018_maize.nlargest(10,['yield'])
fig = px.bar(df_crop_2018_maize_top, y='yield', x='Area', text='yield').update_traces(texttemplate='%{text:.2s}', textposition='outside').update_layout(uniformtext_minsize=8, uniformtext_mode='hide',title ='Top Maize Yield')
fig.show()
df_reg =pd.read_csv('regional_code.csv') # regions for the country data
df_reg.head()
df_region = df_reg[['Country Group','Country Code']]
df_region.head()
df_region.isna().sum()
df_crop.head()
df_crop_reg = pd.merge(df_crop, df_region, how='left', left_on=['Area Code'], right_on=['Country Code'])
df_crop_reg.head()
# df_crop_reg_profile = ProfileReport(df_crop_reg)
# df_crop_reg_profile
df_crop_reg = df_crop_reg.dropna()
df_crop_reg_2017 = df_crop_reg.query("Year =='2017'")
import folium
url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
country_shapes = f'{url}/world-countries.json'
the_map = folium.Map()
the_map.choropleth(
geo_data=country_shapes,
name='choropleth',
data=df_crop_reg_2017,
columns=['Area', 'yield'],
key_on='feature.properties.name',
fill_color='Reds',
nan_fill_color='white',
fill_opacity=0.7,
line_opacity=0.2,
)
folium.LayerControl().add_to(the_map)
the_map
df_crop_reg_2017_m = df_crop_reg_2017.query("Item =='Maize'")
url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
country_shapes = f'{url}/world-countries.json'
maize_map = folium.Map()
maize_map.choropleth(
geo_data=country_shapes,
name='choropleth',
data=df_crop_reg_2017_m,
columns=['Area', 'yield'],
key_on='feature.properties.name',
fill_color='Reds',
nan_fill_color='white',
fill_opacity=0.7,
line_opacity=0.2,
)
folium.LayerControl().add_to(maize_map)
maize_map
df_temp = pd.read_csv('global_temp_change.csv')
df_temp.head()
#df_temp.Area.unique()
df_temp.columns
df_temp.rename(columns={'Value':'temperature'}, inplace =True)
df_temp.columns
We only need the following features from the data. the rest are either constant or irrelevant
[ 'Area Code', 'Area','Months Code', 'Months', 'Year', 'Value']
df_temp = df_temp[[ 'Area Code', 'Area','Months Code', 'Months', 'Year', 'temperature']]
df_temp.head()
df_temp.isna().sum()
df_temp=df_temp.dropna()
df_temp.Area.unique()
# create a positive anomaly column bc the animation doesn't work for negative values
df_temp['pos_temp'] = df_temp.temperature.apply(lambda x: 0 if x < 0 else x)
df_temp.head()
# taking only the yearly anomaly
df_temp_metro = df_temp.query("Months == 'Meteorological year'")
df_temp_metro.head()
#sortthe highest 20 anomalies
df_temp_top_20 = df_temp_metro.nlargest(20,['temperature'])
df_temp_top_20
#plot the highest 20 anomaly
fig = px.scatter(df_temp_top_20, x="Year", y="temperature", size='temperature' ,title='Highest Temperature Anomaly since 1960')
fig.show()
# Animation by year
fig = px.choropleth(df_temp_metro,locations="Area", locationmode="country names",animation_frame="Year", animation_group="Area",
color="pos_temp",color_continuous_scale= 'reds' , hover_name="Area", title = 'Global Temperature Anomaly')
fig.show()
import plotly.io as pio
pio.write_html(fig, file='temp.html', auto_open=True)
df_co2_all = pd.read_csv('WorldCo2TotalWorldBank.csv')
df_co2_all.head()
# Melt data from wide format to long format
df_co2 = pd.melt(df_co2_all, id_vars=['Country Name','Country Code' ,'Indicator Name', 'Indicator Code'], var_name='year',value_name='Value')
df_co2.head()
df_co2.isna().sum()
#Fill the missing values using forward fill method
df_co2.fillna(method='ffill', inplace = True)
df_co2.isna().sum()
df_co2= df_co2.drop(['Indicator Name','Indicator Code'], axis = 1)
df_co2.head()
df_co2.rename (columns={'Country Name':'CountryName','Country Code':'CountryCode', 'Value':'co2'}, inplace= True)
df_co2['CountryName'].unique()
df_temp_metro.head()
df_temp_metro_world_max = df_temp_metro.groupby(['Year'], as_index=False)['temperature'].max()
df_temp_metro_world_max.tail()
df_co2_world = df_co2.query("CountryName == 'World'")
df_co2_world.head()
df_co2_world.info()
df_temp_metro_world_max.info()
df_temp_metro_world_max['Year']=df_temp_metro_world_max['Year'].astype(str)
df_temp_co2 = pd.merge(df_temp_metro_world_max, df_co2_world, how='left', left_on=['Year'], right_on=['year'])
df_temp_co2.head()
df_temp_co2.dropna(inplace= True)
df_temp_co2.head()
import plotly.express as px
fig = px.bar(df_temp_co2, x='Year', y='co2',
hover_data=['co2', 'temperature'], color='temperature', title= 'Global Co2 Emission & Temperature Anomaly',
labels={'co2':'World co2 emission'}, height=400)
fig.show()
py.plot(fig, filename = 'Co2ByTemperature', auto_open=True)
fig = go.Figure()
trace = go.Scatter(x=df_co2_world["year"], y=df_co2["co2"], mode="lines+markers")
fig.add_trace(trace)
fig.update_layout(
title={
"text": "World Co2 Emission",
"x":0.5,
"xanchor": "center"
},
xaxis_title="Year",
yaxis_title="co2 emission")
fig.show()
df_crop_reg_sorted=df_crop_reg.sort_values(by=['Year'])
df_temp_sorted=df_temp.sort_values(by=['Year'])
df_crop_temp = pd.merge_asof(df_temp_sorted, df_crop_reg_sorted,
on='Year',
by='Area')
df_crop_temp.head()
df_crop_reg.head()
df_crop_temp.isna().sum()
df_crop_temp= df_crop_temp.dropna()
df_crop_temp.shape
df_crop_temp.isna().sum()
df_crop_temp.head()
df_crop_temp.Months.unique()
df_crop_temp['temp']= df_crop_temp.temperature.apply(lambda x : 'low' if (x>=0 and x<=2) else 'high')
df_crop_temp.head()
df_crop_temp_re = df_crop_temp.groupby(['temp']).agg({'yield':'sum'}).reset_index()
df_crop_temp_re.head()
product =df_crop_temp['Item'].unique()
for prod in product:
globals()[prod] = df_crop_temp.loc[df_crop_temp['Item'] == prod]
Maize_max =Maize.groupby(['Year'], as_index = False)['yield'].max()
Wheat_max =Wheat.groupby(['Year'], as_index = False)['yield'].max()
Rice_max =Rice.groupby(['Year'], as_index = False)['yield'].max()
import plotly.graph_objects as go
fig = go.Figure()
trace1 = go.Scatter(x=Wheat_max["Year"], y=Wheat_max["yield"], mode="lines+markers", name="Max Wheat")
fig.add_trace(trace1)
trace2 = go.Scatter(x=Rice_max["Year"], y=Rice_max["yield"], mode="lines+markers", name="Max Rice")
fig.add_trace(trace2)
trace3 = go.Scatter(x=Maize_max["Year"], y=Maize_max["yield"], mode="lines+markers", name="Max Maize")
fig.add_trace(trace3)
fig.update_layout(
title={
"text": "Wheat, Rice & Maize Maximumy yearly Production",
"x":0.5,
"xanchor": "center"
},
xaxis_title="Year",
yaxis_title="Yield per Hectare")
fig.show()
df_crop_temp.rename(columns={"Country Group": "region"} , inplace = True)
df_crop_temp['region'].unique()
df_crop_temp['region'] = df_crop_temp['region'].str.replace(' ','_')
df_crop_temp['region'].unique()
df_crop_temp['region'] = df_crop_temp['region'].str.replace('-','_')
df_crop_temp['region'].unique()
regions =df_crop_temp['region'].unique()
for reg in regions:
globals()[reg] = df_crop_temp.loc[df_crop_temp['region'] == reg]
Africa.head()
Africa_maize = Africa.query("Item=='Maize'")
Africa_maize_ave = Africa_maize.groupby(['Year'], as_index=False)['yield'].mean()
Africa_rice = Africa.query("Item=='Rice'")
Africa_rice_ave = Africa_rice.groupby(['Year'], as_index=False)['yield'].mean()
Africa_wheat = Africa.query("Item=='Wheat'")
Africa_wheat_ave = Africa_wheat.groupby(['Year'], as_index=False)['yield'].mean()
fig = go.Figure()
trace1 = go.Scatter(x=Africa_wheat_ave["Year"], y=Africa_wheat_max["yield"], mode="lines+markers", name="Africa Wheat")
fig.add_trace(trace1)
trace2 = go.Scatter(x=Africa_rice_ave["Year"], y=Africa_rice_max["yield"], mode="lines+markers", name="Africa Rice")
fig.add_trace(trace2)
trace3 = go.Scatter(x=Africa_maize_ave["Year"], y=Africa_maize_max["yield"], mode="lines+markers", name="Africa Maize")
fig.add_trace(trace3)
fig.update_layout(
title={
"text": "Average Yearly Crop Production in Africa",
"x":0.5,
"xanchor": "center"
},
xaxis_title="Year",
yaxis_title="Yield per Hectare")
fig.show()
df_temp_metro_max =df_temp_metro.groupby(['Year'], as_index = False)['temperature'].max()
df_temp_metro_max.head()
df_temp_metro_max_2018 =df_temp_metro_max.query("Year == '2018'")
df_temp_metro_max_2017 =df_temp_metro_max.query("Year == '2017'")
df_temp_metro_max_2016 =df_temp_metro_max.query("Year == '2016'")
df_temp_metro_max_2015 =df_temp_metro_max.query("Year == '2015'")
# import plotly.graph_objects as go
# fig = go.Figure()
# trace1 = go.Scatter(x=df_temp_metro_max_2018["Months"], y=df_temp_metro_max_2018["temperature"], mode="lines+markers", name="2018")
# fig.add_trace(trace1)
# trace2 = go.Scatter(x=df_temp_metro_max_2017["Months"], y=df_temp_metro_max_2017["temperature"], mode="lines+markers", name="2017")
# fig.add_trace(trace2)
# trace3 = go.Scatter(x=df_temp_metro_max_2016["Months"], y=df_temp_metro_max_2016["temperature"], mode="lines+markers", name="2016")
# fig.add_trace(trace3)
# fig.update_layout(
# title={
# "text": "Max Temp anomaly ",
# "x":0.5,
# "xanchor": "center"
# },
# xaxis_title="Year",
# yaxis_title="temp")
# fig.show()
Rice.head()
df_cropr_max= Rice.groupby(['Year'], as_index = False)['yield'].max()
df_cropw_max= Wheat.groupby(['Year'], as_index = False)['yield'].max()
df_cropm_max= Maize.groupby(['Year'], as_index = False)['yield'].max()
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(
go.Scatter(x=df_cropw_max["Year"], y=df_cropw_max["yield"], mode="lines+markers", name="Max Wheat"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=df_cropr_max["Year"], y=df_cropr_max["yield"], mode="lines+markers", name="Max Rice"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=df_cropm_max["Year"], y=df_cropm_max["yield"], mode="lines+markers", name="Max Maize"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=df_temp_metro_max["Year"], y=df_temp_metro_max["temperature"], mode="lines+markers", name="Max Temp"),
secondary_y=True,
)
# Add figure title
fig.update_layout(
title_text="Max crop production & Maximum Temperature"
)
# Set x-axis title
fig.update_xaxes(title_text="Year")
# Set y-axes titles
fig.update_yaxes(title_text="<b>crop production per Hectare</b> ", secondary_y=False)
fig.update_yaxes(title_text="<b>Temperature in celicious</b> ", secondary_y=True)
fig.show()
df_co2.head()
df_co2.rename(columns={'CountryName' :'Area', 'year':'Year', 'Value': 'co2'}, inplace = True)
df_co2.info()
df_co2['Year']= df_co2.Year.astype('int64')
df_co2.info()
df_crop_temp.info()
df_crop_temp_sorted=df_crop_temp.sort_values(by=['Year'])
df_co2_sorted=df_co2.sort_values(by=['Year'])
df_crop_temp_co2 = pd.merge_asof(df_co2_sorted, df_crop_temp_sorted,
on='Year',
by='Area')
df_crop_temp_co2.head()
df_crop_temp_co2 = df_crop_temp_co2.dropna()
df_co2.isna().sum()
df_crop_temp_co2.head()
df_crop_temp_co2.region.unique()
df_crop_temp_co2_reg = df_crop_temp_co2.query( "region in ['World','Asia','Africa' ]")
df_crop_temp_co2_reg.head()
fig = px.scatter(
df_crop_temp_co2_reg,
x="temperature",
y="co2",
animation_frame="Year",
animation_group="Area",
size="yield",
color="region",
hover_name="Area",
facet_col="region",
size_max=45
# range_y=[0,1000]
)
fig.show()
import plotly.express as px
fig = px.bar(df_crop_temp_co2_reg, x="region", y="co2",color='co2',
animation_frame='Year', hover_name='Area')
fig.show()
df_crop_temp_co2.nlargest(5, ['co2'])
# df.nlargest(5, ['total exports'])